# -*- coding: utf-8 -*-
import scrapy
from douban.items import *

class DoubanGeneralSpiderSpider(scrapy.Spider):
    name = 'douban_general_spider'
    allowed_domains = ['www.douban.com']
    start_urls = ['https://www.douban.com/group/shanghaizufang/discussion?start=0']
    '''
    分页规则：start = 25*(n-1)
    '''

    def parse(self, response):
        print('开始爬取...')
        title_list = response.xpath('//table//td[@class="title"]/a/@title').extract()
        href_list = response.xpath('//table//td[@class="title"]/a/@href').extract()
        for i in range(len(title_list)):
            item = DoubanItem()
            item['title'] = title_list[i]
            item['href'] = href_list[i]
            yield item
